*Replication do-file for Antman, Francisca M. 2022.  "For Want of a Cup: The Rise of Tea in England and the Impact of Water Quality on Mortality." Forthcoming, Review of Economics and Statistics.
*See README file.  First collect data sets in README file.  Then run this do file: Initial Set up Replication.do.
*After this, run Replication Tables.do.


/*****************************************************************************************************************************/
/********THIS FILE SETS UP THE PARISH MORTALITY SET AND MERGES WITH TEA DATA AND OTHER PARISH CHARACTERISTICS*****************/
/*****************************************************************************************************************************/

set more off
clear all

*Change this:
cd "C:\Users\Tea Paper"

cap log close
log using "logfiles\InitialSetupRep.smcl", replace


/*TEA*/
clear
insheet using "East India Company\imports_tea.csv"
rename china tea
replace tea=tea/1000000
tsset year, yearly  /*National yearly tea data*/
save "East India Company\Tea Imports.dta", replace



/*OTHER IMPORTS--The miscellaneous imports seem to have been a fairly consistent category for the company, so despite being labeled "miscellaneous", they may
still be usable in a falsification test. See the guide included with the East India Company data.*/
clear
insheet using "East India Company\imports_miscellaneous.csv"
drop bengal
drop madras
drop bombay
drop canton
drop benkulen
drop mocha
drop balambangan
ren total miscellaneous
replace miscellaneous=miscellaneous/1000000
tsset year, yearly  
save "East India Company\Miscellaneous Imports.dta", replace



/*Parish Characteristics */

use "statadata\parish characteristics_original.dta", clear
destring  hpewcode, replace  

*Rename old altitude variable--it will be replaced by mean altitude from GIS
rename altitude WSaltitude
label var WSaltitude "altitude from original parish data set"

sort hpewcode
save "statadata\Parish Characteristics.dta", replace


/*Parish Mortality Data*/

use "statadata\Parish Mortality (monthly).dta", clear
 
drop  baporig marorig burorig
*This drops the original, un-adjusted figures.

sort hpewcode year

*Several of the parish id variables (hpewcode) had letters in them. ("hpew" stands for historic parishes of England and Wales).
replace hpewcode="22079" if hpewcode=="22079A"
replace hpewcode="121334" if hpewcode=="12/334A"
replace hpewcode="12314" if hpewcode=="12314A"
replace hpewcode="14326" if hpewcode=="14326B"
replace hpewcode="16132" if hpewcode=="16132A"
replace hpewcode="22024" if hpewcode=="22024B"
replace hpewcode="22762" if hpewcode=="22762A"
replace hpewcode="29266" if hpewcode=="29266A"
replace hpewcode="31415" if hpewcode=="31415A"
replace hpewcode="25510" if hpewcode=="25510A"
replace hpewcode="25649" if hpewcode=="25649A/B"
replace hpewcode="32197" if hpewcode=="32197A"
replace hpewcode="32301" if hpewcode=="32301A"
replace hpewcode="33042" if hpewcode=="33042A"
replace hpewcode="34110" if hpewcode=="34110"
replace hpewcode="34110" if hpewcode=="34110A"
replace hpewcode="34471" if hpewcode=="34471B"
replace hpewcode="36040" if hpewcode=="36040A"
replace hpewcode="43393" if hpewcode=="43393A"
replace hpewcode="43704" if hpewcode=="43704A"
replace hpewcode="32483" if hpewcode=="32483A"
replace hpewcode="25441" if hpewcode=="25441A"
replace hpewcode="22763" if hpewcode=="22763A"
replace hpewcode="13270" if hpewcode=="13270A"
replace hpewcode="02218" if hpewcode=="02218A"
replace hpewcode="01026" if hpewcode=="01026A"
replace hpewcode="01068" if hpewcode=="01068A"
replace hpewcode="15278" if hpewcode=="15278A"
replace hpewcode="33217" if hpewcode=="33217A"
replace hpewcode="39132" if hpewcode=="39132A"

destring  hpewcode, replace


*************************************************

foreach var of varlist bap mar bur {
	preserve
	egen varmiss=rowmiss(`var')
	sort hpewcode year
	by hpewcode year: egen maxvarmiss=max(varmiss)
	count if varmiss!=maxvarmiss  /*Looks like if a month's value is missing then the entire year's value is missing*/
	tab varmiss maxvarmiss
	drop if maxvarmiss>0
	drop maxvarmiss varmiss

	collapse (sum) `var', by(hpewcode year)
	sort hpewcode year
	save "annual`var'.dta", replace
	restore
}

clear 
use "statadata\annualbap.dta", clear
merge 1:1 hpewcode year using "annualmar.dta"
tab _merge
drop _merge
sort hpewcode year
merge 1:1 hpewcode year using "annualbur.dta"
tab _merge
drop _merge
sort hpewcode year


xtset hpewcode year, yearly

*Merge in the Parish Characteristics to the master dataset (Parish Mortality (annual))

merge m:1 hpewcode using "statadata\Parish Characteristics.dta"


tab _merge
drop _merge

ren m_t_1700 market_town
ren nmt1700m dist_to_market 
label var market_town "Market town in 1700"

rename m_t_1640 market_town1640
replace market_town1640=0 if market_town==2  




tab market_town  /*note 9 is missing*/
replace market_town=0 if market_town==2
replace market_town=. if market_town>2

/*5. Parish Populations: Set up population variable following Wachter (1998), uses lagged birth, marriage, and death rates to infer a population. */

tsset  
foreach var of varlist bap mar{
	foreach x of numlist 1/19 {
		gen l`x'`var'=l`x'.`var'
	}
	egen nonmisslags`var'=rownonmiss(`var' l1`var'-l19`var')
	egen lagsum`var'=rowtotal(`var' l1`var'-l19`var'), missing 
	gen smooth`var'=lagsum`var'/nonmisslags`var'
	drop l1`var'-l19`var' lagsum`var'
} 


foreach x of numlist 1/19 {
	gen l`x'bur=l`x'.bur
}
egen nonmisslagsbur=rownonmiss(l1bur-l19bur)
egen lagsumbur=rowtotal(l1bur-l19bur), missing 
gen smoothbur=lagsumbur/nonmisslagsbur
drop l1bur-l19bur lagsumbur 

gen pop1=.4*smoothbap/.030 +.4*smoothbur/.025 + .2*smoothmar/.008
label var pop1 "Population based on Wachter formula"




*Population Densities and Mortality Rates:

foreach X of numlist 1 {
	gen density`X'=pop`X'/area
	gen death_rate`X'=bur/(pop`X'/1000)
}


xtset /*again, just sorting*/

*MERGE IN TEA DATA
merge m:1 year using "East India Company\Tea Imports.dta"
drop _merge



*MERGE IN MISC IMPORTS
merge m:1 year using "East India Company\Miscellaneous Imports.dta"
drop _merge




*MERGE IN COASTAL WHICH IS A CATEGORICAL VARIABLE
merge m:1 hpewcode using "Coastal Variable.dta"
drop _merge

/*4. Coastal Variable defined using Parish Guide and GIS maps. The coastal variable is defined as follows:

0: Landlocked county
1: Landlocked parish in coastal county
2: Landlocked parish within 10km of ocean or tidal river.
3: Landlocked parish within 5km of ocean or tidal river.
4: Parish is on coast

*/

*NEED TO CHANGE COASTAL CATEGORICAL VAR INTO INDICATOR
tab coastal, gen(coast)  /*generates set of dummies for coast from 1 to 5 with 5 being on coast*/
gen nearcoast1=1 if coast4==1 | coast5==1  
replace nearcoast1=0 if nearcoast1==.
gen nearcoast2=1 if coast3==1 | coast4==1 |coast5==1  
replace nearcoast2=0 if nearcoast2==.
label var nearcoast1 "Parish on coast or within 5 km of coast: coast4 or coast5"
label var nearcoast2 "Parish on coast or within 10 km of coast: coast3 or coast4 or coast5" 
rename coast5 oncoast
label var oncoast "Parish is on coast: coast5 coastal 5"
rename coast1 landlocked
label var landlocked "Parish is in landlocked county: coast1 coastal 0"

/*SOIL QUALITY*/

gen low_retention=0 if so_type<13000

replace low_retention=1 if so_type>=13000 & so_type<14000
replace low_retention=0 if so_type>=14000 & so_type<16000
replace low_retention=1 if so_type>=16000 & so_type<17000
replace low_retention=0 if so_type>=17000 & so_type<18000
replace low_retention=1 if so_type>=18000 & so_type<21000
replace low_retention=0 if so_type>=21000


xtset
gen lag_tea=l.tea  

gen lag_misc=l.miscellaneous


rename dist_to_market market_dist


compress
save "statadata\ParishMortalityAnnual.dta", replace
cap log close
